library(tidyverse)
library(broom)
library(lubridate)
library(devtools)
library(dsbox)
library(ggridges)
library(tidymodels)
library(openintro)
library(plotly)
US_deaths_cases <- 
  read_csv("/cloud/project/data/United_States_COVID-19_Cases_and_Deaths_by_State_over_Time.csv")
## Rows: 37380 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (5): submission_date, state, created_at, consent_cases, consent_deaths
## dbl (10): tot_cases, conf_cases, prob_cases, new_case, pnew_case, tot_death,...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(US_deaths_cases)
## Rows: 37,380
## Columns: 15
## $ submission_date <chr> "02/12/2021", "03/01/2021", "08/22/2020", "08/12/2020"…
## $ state           <chr> "UT", "CO", "AR", "AS", "AS", "MA", "GA", "NYC", "AS",…
## $ tot_cases       <dbl> 359641, 438745, 56199, 0, 0, 704796, 1187107, 948436, …
## $ conf_cases      <dbl> 359641, 411869, NA, NA, NA, 659246, 937515, 782257, NA…
## $ prob_cases      <dbl> 0, 26876, NA, NA, NA, 45550, 249592, 166179, NA, NA, 4…
## $ new_case        <dbl> 1060, 677, 547, 0, 0, 451, 3829, 394, 0, 8835, 2766, 3…
## $ pnew_case       <dbl> 0, 60, 0, 0, NA, 46, 1144, 95, 0, 2003, 317, 29, 400, …
## $ tot_death       <dbl> 1785, 5952, 674, 0, 0, 17818, 21690, 33203, 0, 19190, …
## $ conf_death      <dbl> 1729, 5218, NA, NA, NA, 17458, 18725, 28130, NA, NA, 3…
## $ prob_death      <dbl> 56, 734, NA, NA, NA, 360, 2965, 5073, NA, NA, 0, 307, …
## $ new_death       <dbl> 11, 1, 11, 0, 0, 5, 7, 6, 0, 66, 3, 15, 10, 69, 0, 5, …
## $ pnew_death      <dbl> 2, 0, 0, 0, NA, 0, 0, 0, 0, 2, 0, 1, 2, 0, 0, 0, 0, 0,…
## $ created_at      <chr> "02/13/2021 02:50:08 PM", "03/01/2021 12:00:00 AM", "0…
## $ consent_cases   <chr> "Agree", "Agree", "Not agree", NA, NA, "Agree", "Agree…
## $ consent_deaths  <chr> "Agree", "Agree", "Not agree", NA, NA, "Agree", "Agree…
new_US_deaths_cases <- US_deaths_cases %>%
  filter(!(state %in% c("NYC", "PR", "GU",
                           "VI", "MP", "RMI",
                           "AS", "PW", "FSM"))) %>%
  group_by(submission_date) %>%
  summarise(total_new_cases = sum(new_case),
            total_new_deaths = sum(new_death),
            total_cases = sum(tot_cases),
            total_deaths = sum(tot_death),
            state = state) 
## `summarise()` has grouped output by 'submission_date'. You can override using the `.groups` argument.
new_US_deaths_cases$submission_date = as.Date(new_US_deaths_cases$submission_date,
                                              format="%m/%d/%Y")

new_US_deaths_cases <- new_US_deaths_cases %>%
  arrange(desc(submission_date))

new_US_deaths_cases
## # A tibble: 31,773 × 6
## # Groups:   submission_date [623]
##    submission_date total_new_cases total_new_deaths total_cases total_deaths
##    <date>                    <dbl>            <dbl>       <dbl>        <dbl>
##  1 2021-10-05                97795             1931    42579909       666441
##  2 2021-10-05                97795             1931    42579909       666441
##  3 2021-10-05                97795             1931    42579909       666441
##  4 2021-10-05                97795             1931    42579909       666441
##  5 2021-10-05                97795             1931    42579909       666441
##  6 2021-10-05                97795             1931    42579909       666441
##  7 2021-10-05                97795             1931    42579909       666441
##  8 2021-10-05                97795             1931    42579909       666441
##  9 2021-10-05                97795             1931    42579909       666441
## 10 2021-10-05                97795             1931    42579909       666441
## # … with 31,763 more rows, and 1 more variable: state <chr>
case_graph = plot_geo(new_US_deaths_cases, 
                      locationmode = 'USA-states', 
                      frame = ~submission_date) %>%
                      add_trace(locations = ~state, 
                      z = ~total_new_cases,
                      zmin = 0, 
                      zmax = max(new_US_deaths_cases$total_new_cases),
                      color = ~total_new_cases,
                      colorscale = 'electric')

 case_graph